# import data
sites <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "sites") %>% 
  clean_names()
ysi <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "ysi") %>% 
  rename_with(tolower) %>% # prevents clean_names() from adding extra underscores before caps
  clean_names() %>%
  mutate(site_code = if_else(site_code == "Corington", "COV",
                             if_else(site_code == "greenhEB", "GEB",
                                     site_code
                               ))) %>%
  rename(site_code_depth = site_code) %>%
  filter(as.character(date) != "2023-10-05") # removing incomplete/aborted day
entero <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "enterococcus") %>% 
  clean_names()
boats <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "boat activity") %>% 
  clean_names() %>%
  mutate(site_code = if_else(site_code == "GIA/LBI", "GIA", site_code)) # coding merged GIA/LBI sites as GIA for now
precip <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "precipitation") %>% 
  clean_names() %>%
  right_join(data.frame(date = seq(ymd('2023-10-10'), as.Date(today()), by='day'))) %>%
  mutate(precipitation_in = replace_na(precipitation_in, 0))

wq_data <- ysi %>%
  select(date, site_code_depth, temperature_c = temperature_c_24, ph, do_mg_l, do_percent_sat, conductivity_us_cm, sal_psu, phycoerythrin_rfu, chlorophyll_rfu) %>%
  drop_na() %>%
  left_join(ysi %>%
              select(date, site_code_depth, turbidity_fnu) %>%
              drop_na(),
            by = c("date", "site_code_depth")) %>%
  mutate(site_code = str_replace(site_code_depth, "[:digit:]", ""),
         depth_m = parse_number(site_code_depth),
         depth_m = replace_na(depth_m, 1)) %>% # surface-only sites are assumed to be approx 1m
  left_join(sites, by = c("site_code")) %>%
  left_join(entero, by = c("date", "site_code")) %>%
  left_join(boats, by = c("date", "site_code")) %>%
  mutate(n_colonies = replace_na(n_colonies, 0),
         entero_yn = if_else(n_colonies == 0, "Absent",
                             if_else(n_colonies > 0, "Present", "NA")),
         n_boats = replace_na(n_boats, 0),
         month = month(ymd(date), label = TRUE),
         year = year(ymd(date)),
         date_label = paste(as.character(month), as.character(year)),
         date_label = if_else(as.character(date) == "2024-04-29", "May 2024", date_label),
         date_cat = if_else(substr(date_label, 1, 3) %in% c("Dec", "Jan", "Feb", "Mar", "Apr"), "High season", "Low season"),
         site_cat = if_else(site_code %in% c("YIE", "GIE"), "Control", "Treatment")
         ) %>%
  select(date, date_label, date_cat, site_code_depth, site_code, depth_m, site_name, site_cat, latitude, longitude, n_boats, temperature_c, ph, do_mg_l, do_percent_sat, conductivity_us_cm, sal_psu, phycoerythrin_rfu, chlorophyll_rfu, turbidity_fnu, n_colonies, entero_yn) %>%
  distinct()

# streamlining data for graphs to include only regularly monitored sites and 1m depth
data_mon <- wq_data %>%
  filter(site_code %in% c("NBRB", "NBRM", "EC", "NBA", "LBI", "GIA", "ML", "RBA", "GOE", "TPBN", "GIE", "YIE", "DB", "MRCH", "COV", "GEB", "FHB", "YIN", "BDB", "LDB", "MRYC") & depth_m == 1)
# GEB currently combined as Exchange Bay - Club Hotel, also omitting EBB/EBBW, YRS, RRS
### need to organize dates from north-south
# Target ranges
date_min <- min(data_mon$date) # earliest sampling date
date_max <- max(data_mon$date) # most recent sampling date

ref_ph = data.frame(xmin = -Inf, xmax = Inf, ymin = 7.7, ymax = 8.5) # 7.5 and 8.4 units  (Rogers et al., 2001)
ref_sal = data.frame(xmin = -Inf, xmax = Inf, ymin = 32, ymax = 42) # 32,000 to 42,000 ppm (NOAA)
ref_turb = data.frame(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 2) # <2 NTU (EPA)
ref_do = data.frame(xmin = -Inf, xmax = Inf, ymin = 6, ymax = Inf) # Must be greater than 6.0 ppm  (EPMA, 2019)
ref_temp = data.frame(xmin = -Inf, xmax = Inf, ymin = 23, ymax = 29.6) # 23°–29°Celsius (Coral Reef Alliance)
ref_temp_bleaching = 30.63 # 30.63C bleaching threshold (NOAA)
ref_ent = data.frame(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 7) # below 7/100mL (EPA)

ylims_ph <- c((min(data_mon$ph, na.rm = T) - 1), 9)
ylims_turb <- c((min(data_mon$turbidity_fnu, na.rm = T) - 1), (max(data_mon$turbidity_fnu, na.rm = T) + 1))
ylims_do <- c((min(data_mon$do_mg_l, na.rm = T) - 1), (max(data_mon$do_mg_l, na.rm = T) + 1))
ylims_temp <- c((min(data_mon$temperature_c, na.rm = T) - 1), (max(data_mon$temperature_c, na.rm = T) + 1))
ylims_phyco <- c((min(data_mon$phycoerythrin_rfu, na.rm = T) - 1), (max(data_mon$phycoerythrin_rfu, na.rm = T) + 1))
ylims_chlor <- c((min(data_mon$chlorophyll_rfu, na.rm = T) - 1), (max(data_mon$chlorophyll_rfu, na.rm = T) + 1))
ylims_entero <- c(0, (max(data_mon$n_colonies, na.rm = T) + 10))

c_range <- "gray20" # setting color for target range in graphs
a_range <- 0.2 # setting alpha (transparency) for target range in graphs

Writing formulas for standard graph types

# flipped point plots by site, with and without reference boxes

flpoint_site <- function(data_wq, y, ylab, ylims) {
  ggplot() +
    geom_point(data = data_wq, 
             aes(site_name, {{y}}, color = date_cat),
             alpha = 0.6) +
    labs(x = "", y = ylab, color = "Season") +
    ylim(ylims) +
    theme_bw() +
    coord_flip()
}

flpoint_site_ref <- function(ref_data, data_wq, y, ylab, ylims) {
  ggplot() +
    geom_rect(data = ref_data, 
              aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = "What"),
              alpha = a_range) +
    scale_fill_manual(values = c_range, guide = "none") +
    geom_point(data = data_wq, 
             aes(site_name, {{y}}, color = date_cat),
             alpha = 0.6) +
    labs(x = "", y = ylab, color = "Season") +
    ylim(ylims) +
    theme_bw() +
    coord_flip()
}

# line plots over time, with and without reference boxes

line_time <- function(data_wq, y, ylab, ylims) {
  ggplot() +
    geom_line(data = data_wq,
            aes(x = date, y = {{y}}, group = site_code),
            color = "lightblue3") +
    geom_point(data = data_wq,
            aes(x = date, y = {{y}}, group = site_code),
            color = "slategray") +
    scale_x_datetime(date_breaks = "1 month",
                   date_labels = "%b") +
    labs(x = "", y = ylab) +
    ylim(ylims) +
    theme_bw()
}

line_time_ref <- function(data_ref, data_wq, y, ylab, ylims) {
  ggplot() +
    geom_rect(data = data_ref, 
              aes(xmin = as.POSIXct(-Inf), xmax = as.POSIXct(Inf) + 10000, ymin = ymin, ymax = ymax, fill = "What"),
              alpha = a_range) +
    scale_fill_manual(values = c_range, guide = "none") +
    geom_line(data = data_wq,
            aes(x = date, y = {{y}}, group = site_code),
            color = "lightblue3") +
    geom_point(data = data_wq,
            aes(x = date, y = {{y}}, group = site_code),
            color = "slategray") +
    scale_x_datetime(date_breaks = "1 month",
                   date_labels = "%b") +
    labs(x = "", y = ylab) +
    ylim(ylims) +
    theme_bw()
}
# for stack overflow

ref_do = data.frame(ymin = 6, ymax = Inf)

ggplot() +
    geom_rect(data = ref_do, 
              aes(xmin = min(data_mon$date), xmax = max(data_mon$date), ymin = 6, ymax = Inf, fill = "What"),
              alpha = 0.4) +
    scale_fill_manual(values = "slategray", guide = "none") +
    geom_line(data = data_mon,
            aes(x = date, y = do_mg_l, group = site_code),
            color = "lightblue3") +
    geom_point(data = data_mon,
            aes(x = date, y = do_mg_l, group = site_code),
            color = "slategray") +
    scale_x_datetime(date_breaks = "1 month",
                   date_labels = "%b") +
    labs(x = "Sampling date", y = expression("Dissolved oxygen (mg "*L^-1*")")) +
    theme_bw()
ggsave(here("water_quality", "figs", "2023-2024", "se1.png"))

ggplot() +
    geom_rect(data = ref_do, 
              aes(xmin = as.POSIXct("2020-01-01"), xmax = as.POSIXct("2030-01-01"), ymin = ymin, ymax = ymax, fill = "What"),
              alpha = 0.4) +
    scale_fill_manual(values = "slategray", guide = "none") +
    geom_line(data = data_mon,
            aes(x = date, y = do_mg_l, group = site_code),
            color = "lightblue3") +
    geom_point(data = data_mon,
            aes(x = date, y = do_mg_l, group = site_code),
            color = "slategray") +
    scale_x_datetime(date_breaks = "1 month",
                   date_labels = "%b") +
    labs(x = "Sampling date", y = expression("Dissolved oxygen (mg "*L^-1*")")) +
    theme_bw()
ggsave(here("water_quality", "figs", "2023-2024", "se2.png"))

ggplot() +
    geom_rect(data = ref_do, 
              aes(xmin = as.POSIXct(-Inf), xmax = as.POSIXct(Inf), ymin = ymin, ymax = ymax, fill = "What"),
              alpha = 0.4) +
    scale_fill_manual(values = "slategray", guide = "none") +
    geom_line(data = data_mon,
            aes(x = date, y = do_mg_l, group = site_code),
            color = "lightblue3") +
    geom_point(data = data_mon,
            aes(x = date, y = do_mg_l, group = site_code),
            color = "slategray") +
    scale_x_datetime(date_breaks = "1 month",
                   date_labels = "%b") +
    labs(x = "Sampling date", y = expression("Dissolved oxygen (mg "*L^-1*")")) +
    xlim(min(data_mon$date), max(data_mon$date)) +
    theme_bw()
ggsave(here("water_quality", "figs", "2023-2024", "se3.png"))

To do:

flpoint_site_ref(ref_ph, data_mon, ph, "pH", ylims_ph)

line_time_ref(ref_ph, data_mon, ph, "pH", ylims_ph)

flpoint_site_ref(ref_turb, data_mon, turbidity_fnu, "Turbidity (FNU)", ylims_turb)

line_time_ref(ref_turb, data_mon, turbidity_fnu, "Turbidity (FNU)", ylims_turb)

flpoint_site_ref(ref_do, data_mon, do_mg_l, expression("Dissolved oxygen (mg "*L^-1*")"), ylims_do)

line_time_ref(ref_do, data_mon, do_mg_l, expression("Dissolved oxygen (mg "*L^-1*")"), ylims_do)

flpoint_site_ref(ref_ent, data_mon, n_colonies, expression("Enterococcus (colonies "*mL^-1*")"), ylims_entero)

line_time_ref(ref_ent, data_mon, n_colonies, expression("Enterococcus (colonies "*mL^-1*")"), ylims_entero)

flpoint_site(data_mon, chlorophyll_rfu, "Chlorophyll a (RFU)", ylims_chlor)

line_time(data_mon, chlorophyll_rfu, "Chlorophyll a (RFU)", ylims_chlor)

flpoint_site(data_mon, phycoerythrin_rfu, "Phycoerythrin (RFU)", ylims_phyco)

line_time(data_mon, phycoerythrin_rfu, "Phycoerythrin (RFU)", ylims_phyco)

# temperature

ggplot() +
  # geom_rect(data = rect_dat_temp,
  #             aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = "What"),
  #             alpha = 0.4) +
  geom_hline(yintercept = ref_temp_bleaching, color = "salmon", linetype = "dashed") +
  # scale_fill_manual(values = c_range, guide = "none") +
  geom_line(data = data_mon,
            aes(x = date, y = temperature_c, group = site_code)) +
  scale_x_datetime(date_breaks = "1 month",
                   date_labels = "%b") +
  labs(x = "", y = expression("Temperature ("*~degree*C*")")) +
  theme_bw()

# dual y attempts

scale = .01 # relates y axis 1 with y axis 2

point_precip <- function(data, y, ylab, scale) {
  ggplot() +
    geom_point(data = data, 
               aes(x = date, y = {{y}}),
               color = "salmon") +
    geom_line(data = precip,
               aes(x = date, y = precipitation_in/scale),
              color = "black") +
    scale_y_continuous(
      name = ylab,
      sec.axis = sec_axis(~.*scale, name = "Precipitation (in/day)")
  ) +
    labs(x = "") +
    theme_bw()
}

point_precip(data_mon, n_colonies, expression("Enterococcus (colonies "*mL^-1*")"), 0.01)
point_precip(data_mon, turbidity_fnu, "Turbidity (FNU)", .1)

Boat play

ggplot(data_mon %>% filter(site_code %in% c("GIA", "RBA", "NBA", "GOE")), 
       aes(x = n_boats, y = n_colonies)) +
  geom_point(color = "slategray", alpha = 0.8) +
  labs(x = "Number of boats in anchorage", y = expression("Enterococcus (colonies "*mL^-1*")"), 0.01) +
  theme_bw()

Precipitation play

ggplot() +
  geom_line(data = precip, 
             aes(x = date, y = precipitation_in)) +
  labs(x = "", y = "Precipitation (inches)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# trying to calculate the amount of rainfall preceding a testing day
ysi_dates <- ysi %>%
  select(date) %>%
  distinct() # need to clarify which dates were actual monitoring days

precip_wk <- ysi %>%
  select(end_date = date) %>%
  distinct() %>%
  mutate(start_date = end_date - days(7)) %>%
  mutate(period = interval(ymd(start_date), ymd(end_date))) %>% # find a way to label this
  fuzzy_left_join(precip, by = c("start_date" = "date", "end_date" = "date"),
                          match_fun = list(`<=`, `>=`)) %>%
  group_by(period) %>%
  summarize(precip_wkprior = sum(precipitation_in))

precip_48hr <- ysi %>%
  select(end_date = date) %>%
  distinct() %>%
  mutate(start_date = end_date - days(2)) %>%
  mutate(period = interval(ymd(start_date), ymd(end_date))) %>% # find a way to label this
  fuzzy_left_join(precip, by = c("start_date" = "date", "end_date" = "date"),
                          match_fun = list(`<=`, `>=`)) %>%
  group_by(period) %>%
  summarize(precip_wkprior = sum(precipitation_in))

Map play

# Antigua shapefile
atg <- st_read(here("mapping", "shapefiles", "atg_adm_2019_shp", "atg_admbnda_adm1_2019.shp")) %>%
  st_union() %>%
  st_sf()
## Reading layer `atg_admbnda_adm1_2019' from data source 
##   `/Users/gizmo/Github/emc/mapping/shapefiles/atg_adm_2019_shp/atg_admbnda_adm1_2019.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -62.34903 ymin: 16.93153 xmax: -61.65653 ymax: 17.72958
## Geodetic CRS:  WGS 84
# set lat/lon for target range
lons = c(-61.72, -61.65)
lats = c(17.03, 17.10)

ggplot() +
  geom_sf(data = atg, fill = "slategray", color = "slategray") + # ATG basemap
  coord_sf(xlim = lons, ylim = lats, expand = FALSE) + # setting map limits
  geom_point(data_mon, 
             mapping = aes(x = longitude, y = latitude, color = entero_yn, size = n_colonies), 
             alpha = 0.7) + # add sites
  scale_color_manual(values = c("turquoise", "tomato")) +
  facet_wrap(. ~ factor(date_label, levels = c("Oct 2023", "Nov 2023", "Dec 2023", "Jan 2024", "Feb 2024", "Mar 2024", "Apr 2024", "May 2024", "Jun 2024"))) +
  labs(size = expression("Colonies "*mL^-1),
       color = "Enterococcus detection") +
  theme_bw() +
  theme(axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank())

ggsave(here("water_quality", "figs", "2023-2024", "entero_map.png"), width = 10, height = 8)

Sargassum play

sites_sarg <- c("EBB", "EBBW", "YIE", "GIE")
sarg <- function(y, ylab) {
  ggplot(wq_data %>% filter(site_code %in% sites_sarg),
       aes(x = date, y = {{y}})) +
    geom_point(aes(color = as.character(depth_m))) +
    facet_grid(. ~ site_cat) +
    theme_bw()
}

sarg(ph, "pH")

sarg(do_mg_l, "DO")

sarg(turbidity_fnu, "Turbidity (FNU)")